##############################################################################################
#                               Computing Portfolio Data for Login Days
#
#
# The 'new_acc_clean_Tr' folder created with 'cleaning_data1.R' houses portfolio data specific to each investor account on their respective transaction days. 
# This script's aim is to generate portfolio data for each account on all their login days.
# Login days are in stored in a separate folder (output--logins)
#
# This script applies a particular function (extract_port_on_many_days) to each account in 'new_acc_clean_Tr' folder, integrating their information with login dates. 
# The result is a long panel that compiles portfolio data for every 'anon' account on all login days, which also include the transaction days. 
# In the  processed data, each observation uniquely represents a combination of a stock, date, and account.
#
# The long panel generated by applying the above function is stored in the file named "310319sell_day_portfolios_newacc_BIG_1_to_10000.csv". 
# (Note that to expedite the processing, we have divided the workload between two virtual machines, as explained in the "cleaning_data1.R" file. 
# The name of the large file being processed on the second virtual machine using this code is "310319sell_day_portfolios_newacc_BIG_10000_to_more.csv")
#
##############################################################################################


rm(list = ls(all=TRUE))
library(data.table)
library(readr)
library(zoo) 

memory.limit(size=10000000)
memory.limit()
Sys.setenv(LANG="en")
Sys.setlocale("LC_TIME","us")

wd <- as.character("D:/settings/beset/Desktop/output_new_acc/new_acc_clean_Tr/") 
wd_location_logins2 <- as.character("C:/Barclays/raw_data_September_2016/output--logins/output--logins/")


#-----------------------------------------------------------
# Reading files cleaned with "cleaning_data1.R"
#-----------------------------------------------------------


setwd(wd)

# Checking we retained accounts that did not show a disagreement with the valuation/holdings files

record.adjusted <- fread("adjust_record.csv")
unique(record.adjusted[,.(anon)]) 
record.adjusted[is.na(diff)]
record.adjusted[diff==0]
omit_anon_valuation_before_open_date <- unique(record.adjusted[!is.na(diff) & diff!=0][,.(anon)])

files <- list.files(pattern="clean")
d.t.files <- as.data.table(parse_number(files)) 
d.t.files <- cbind(d.t.files, data.table(files))
d.t.files <- d.t.files[(!d.t.files$V1 %in% omit_anon_valuation_before_open_date$anon)] 

files <- d.t.files$files #example of a file name: "clean_136918.csv"
length(files)  


extract_anon_id <- function(x)
{
  unlist(strsplit(unlist(strsplit(x, split="_"))[2] , split=".c"))[1]
}
all_files <- data.table(file=files)
all_files[,anon:=unlist(lapply(files, extract_anon_id))] #identical to d.t.files


#-----------------------------------------------------------
# Extracting sell dates
#-----------------------------------------------------------

results <- data.table()

for(i in 1:nrow(all_files))
{
  file <- all_files[i,file]
  data <- read.csv(file)  
  data <- data.table(data) 
  data <- unique(data, by="anon_date")
  data <- data[num_sales_the_day_LSE>=1] 
  data <- data[,.(Date, anon, anon_date, new_ac)]
  l <- list(results, data)
  results <- rbindlist(l)
  print(c(i, format(Sys.time(), "%X")))
}

results<- results[!anon %in% as.numeric(omit_anon_valuation_before_open_date$anon)]


#-----------------------------------------------------------
#  Reading login days
#-----------------------------------------------------------


setwd(wd_location_logins2)

login.files <- list.files(pattern=".csv")

library(readr)
d.t.login.files <- as.data.table(parse_number(login.files))
d.t.login.files <- cbind(d.t.login.files, data.table(login.files))
setnames(d.t.login.files, "V1", "anon")
d.t.login.files[, anon:=as.character(anon)]

# merging  transaction days and login days

all_files<-merge(all_files, d.t.login.files, by=c("anon"), all.x=TRUE)
login.files<-NULL
all_files_w.login <- all_files[!is.na(login.files)] 

data_sample_append<-NULL

for (file_i in 1:nrow(all_files_w.login)) {
  
  anon.login= all_files_w.login[file_i,anon]
  
  file.login= all_files_w.login[file_i,login.files]
  data.login <- fread(file.login)
  
  data.login[,date.login:=as.Date(d,"%d/%m/%Y" )]
  data.login[,anon:=anon.login ]
  data.login <- unique(data.login[,.(anon, date.login)])
  
  data_sample_append <- rbind(data_sample_append, data.login)
  print(file_i)
}

login.dates<- data_sample_append 

used_anons<- unique(login.dates$anon) 

days <- results #transaction days

used_anons2<- unique(days$anon)

login_dates_used<- login.dates[,.(anon,date.login)  ]
login_dates_used <- login_dates_used[anon %in% used_anons2]

potential_dates<-login_dates_used 
potential_dates<-unique(potential_dates) 
setnames(potential_dates, "date.login", "Date")

new_times<- potential_dates[anon==anon_id]
new_times[, Date:=as.Date(Date, "%Y-%m-%d")]
new_times<-new_times$Date

setwd(wd)


#-----------------------------------------------------------
# function to reconstruct end-day portfolios
#-----------------------------------------------------------

extract_port_on_many_days <- function(anon_id) 
{
  
  file <- all_files[anon==anon_id, file]
  data <- read.csv(file)
  data <- data.table(data)
  
  if(nrow(data)==0){data.table()}else{
    
    data <- data[order(sedol, date, transaction_time)]
    data[,num:=seq(1,nrow(.SD)), by=list(sedol, date)]
    data[,max.num:=max(.SD[,num]), by=list(sedol, date)]
    data <- data[num==max.num]
    
    if(nrow(data)==0){return(data.table())}else{
      
      data[,date:=as.Date(date, "%Y-%m-%d")]
      
      total_number_tra<- nrow(unique(data[narrative=="BUY" | narrative=="SELL",.(date)]))
      
      data[, current_date:=date]
      static_information_of_stocks <- data[,head(.SD,1), by= sedol ][,.(isin, Code_used_DS,sedol,valuation,
                                                                        B_asset_class,
                                                                        anon,
                                                                        new_ac,  name,
                                                                        currency, DS_STOCK_TYPE, DS_SECURITY_TYPE, DS_GEOGRAPHY_GROUP_NAME)]
      
      (positions_account <- data[, .(sedol,  net.posit, prior.posit,  current_date, 
                                     narrative, unit_price,  
                                     quantity, quantity_buy_sell, upp,
                                     qwapp, qwapp_bef, unit_price, app, af)]) #app is the price, which takes into account any stock splits; upp is the unadjusted price
      
      
      (positions_account<- positions_account[order(current_date, sedol)])
      
      
      # We're defining the specific dates for which we want to examine portfolios: these include all transaction dates, along with the login dates 
      
      column_name1 ="sedol"
      column_name2 ="current_date" 
      (Stocks = unique(positions_account[, get(column_name1)   ]))
      (Times = unique(positions_account[, get(column_name2)])) 
      
      Times<-unique(c(Times,new_times)   ) 
      Times<-sort(Times, increasing=T)
      
      # We're augmenting the data on each stock to include all relevant dates (dates in Times)
      
      (Full_Panel = data.table(expand.grid(Stocks, Times)))
      setnames(Full_Panel, c(column_name1, column_name2))
      setkeyv(Full_Panel, c(column_name1, column_name2))
      setkeyv(positions_account, c(column_name1, column_name2))
      (positions_account <- positions_account[Full_Panel][!is.na(sedol)])
      
      positions_account[!is.na(net.posit)]
      positions_account[, net.posit:= as.numeric(net.posit)]
      positions_account<-positions_account[order(sedol, current_date)]
      
      # we move the positions forward, and then we move them backward. We are able to move positions from transaction days because on days without any transactions, the positions remain unchanged. 
      
      cols <- c( "net.posit")
      (positions_account[, (cols) := na.locf(.SD, na.rm = F), by=sedol,  .SDcols = cols]) 
      
      cols <- c( "prior.posit") 
      (positions_account[, (cols) := na.locf(.SD, na.rm = F), by=sedol,  .SDcols = cols]) 
      
      cols <- c( "quantity") 
      (positions_account[, (cols) := na.locf(.SD, na.rm = F), by=sedol,  .SDcols = cols]) 
      
      cols <- c( "qwapp") 
      (positions_account[, (cols) := na.locf(.SD, na.rm = F), by=sedol,  .SDcols = cols]) 
      
      cols <- c( "qwapp_bef") 
      (positions_account[, (cols) := na.locf(.SD, na.rm = F), by=sedol,  .SDcols = cols]) 
      
      cols <- c( "unit_price") 
      (positions_account[, (cols) := na.locf(.SD, na.rm = F), by=sedol,  .SDcols = cols]) 
      
      cols <- c( "app") 
      (positions_account[, (cols) := na.locf(.SD, na.rm = F), by=sedol,  .SDcols = cols]) 
      
      cols <- c( "upp") 
      (positions_account[, (cols) := na.locf(.SD, na.rm = F), by=sedol,  .SDcols = cols]) 
      
      cols <- c( "af")
      (positions_account[, (cols) := na.locf(.SD, na.rm = F), by=sedol,  .SDcols = cols]) 
      
      positions_account[!is.na(narrative), has_narrative:=1]
      positions_account[abs(net.posit*upp)<.01, net.posit:=0]
      positions_account[abs(prior.posit*upp)<.01, prior.posit:=0]
      positions_account<-unique(positions_account[!is.na(net.posit)])
      positions_account1  <- positions_account
      positions_account1[!is.na(narrative), a_transaction_date:=(current_date)]
      
      cols <- c( "a_transaction_date")
      (positions_account1[, nearest.td:= na.locf(.SD, na.rm = F), by=sedol,  .SDcols = cols]) 
      (positions_account1[, nearest.td:=as.Date(nearest.td, "%Y-%m-%d")])
      positions_account1[ , s.nearest.td0:=shift(nearest.td,1,NA,"lag") ,by=.(sedol)] 
      positions_account1[ current_date!= nearest.td, s.nearest.td0:=NA] 
      
      cols <- c( "s.nearest.td0")
      (positions_account1[, s.nearest.td:= na.locf(.SD, na.rm = F), by=sedol,  .SDcols = cols]) 
      positions_account1[ ,nearest.td.for.overal:=max(.SD[,nearest.td], na.rm=T), by=.(current_date)]
      
      cols <- c( "nearest.td.for.overal")
      (positions_account1[, nearest.td.overal:= na.locf(.SD, na.rm = F), by=sedol,  .SDcols = cols]) 
      
      
      for_lag<-unique(positions_account1[,.(nearest.td.overal, current_date)])[order(current_date)]
      for_lag[ , s.nearest.td.overal0:=shift(nearest.td.overal,1,NA,"lag") ] 
      for_lag[, nearest.td.overal:=NULL]
      positions_account1<- merge(positions_account1, for_lag, by=c("current_date"), all.x=T)
      
      positions_account1[ nearest.td.overal!= current_date, s.nearest.td.overal0:=NA] 
      (positions_account1[, s.nearest.td.overal:= s.nearest.td.overal0]) 
      positions_account1<-positions_account1[order(sedol, current_date)]
      
      
      positions_account1[prior.posit==0 & narrative=="BUY", pur_day:=current_date]
      positions_account1[,pur_day:=na.locf(pur_day,  na.rm=FALSE),by=sedol]
      
      positions_account1[,.(sedol, net.posit, pur_day, current_date, a_transaction_date, nearest.td, s.nearest.td, nearest.td.overal, s.nearest.td.overal)]
      
      positions_account1[, port_date:=current_date]
      positions_account1[,last_td:=nearest.td]
      (positions_account1 <- merge(positions_account1, static_information_of_stocks, by=c("sedol" ), all.x=TRUE))
      
      
      cols <- c( "narrative")
      (positions_account1[, narrative1:= na.locf(.SD, na.rm = F), by=sedol,  .SDcols = cols]) 
      positions_account1[,narrative:=narrative1]
      
      positions_account1<- positions_account1[net.posit>0 | has_narrative==1]
      positions_account1[, number_tr_period:=total_number_tra]
      
      positions_account1[, a_login:=0]
      positions_account1[port_date %in% new_times, a_login:=1]
      positions_account1[,.(number_tr_period, narrative, anon, port_date, Code_used_DS, sedol, isin, name, net.posit, 
                            prior.posit, qwapp, qwapp_bef, last_td, quantity, unit_price, B_asset_class,
                            currency, DS_STOCK_TYPE, DS_SECURITY_TYPE, DS_GEOGRAPHY_GROUP_NAME, new_ac, app, af,
                            s.nearest.td ,nearest.td.overal ,s.nearest.td.overal ,   pur_day, a_login)]  
      
      
      
    }
  }
}




setwd(wd)

# checking function
trying2 <- extract_port_on_many_days(anon_id=152743)
# looks fine

#-----------------------------------------------------------
# The function is applied to each transaction day, resulting in a long panel that includes portfolio data for all 'anon' identifiers and login dates.
#-----------------------------------------------------------

potential_dates
accounts_in_potential_dates<- unique(potential_dates$anon)
temp_list <- list() 
for(i in 1:length(accounts_in_potential_dates))
{
  a <- accounts_in_potential_dates[i]
  sub <- extract_port_on_many_days(anon_id=a)
  temp_list <- append(temp_list, list(sub)) 
  print(c(i, format(Sys.time(), "%X")))
}

data <- rbindlist(temp_list) 


data[,anon_date:=paste(anon, "_", port_date, sep="")]

data[,same_day_purchase:=ifelse(port_date==last_td & narrative=="BUY" & quantity>0, 1, 0)] # equal to 1 for the related sedol after a purchase
inc <- unique(data[same_day_purchase==1, anon_date])
data[,same_day_purchase_port:=ifelse(anon_date %in% inc, 1, 0)] # equal to 1 for all the sedols in the purchase day

data[,sell:=ifelse(port_date==last_td & narrative=="SELL" & quantity<0, 1, 0)]
data[,num_sales_the_day:=sum(.SD[,sell]), by=anon_date]

data[,buy:=ifelse(port_date==last_td & narrative=="BUY" & quantity>0, 1, 0)]
data[,num_purchases_the_day:=sum(.SD[,buy]), by=anon_date]

data[,num_sales_the_day_LSE:=sum(.SD[!is.na(currency) & currency=="GBP" & DS_STOCK_TYPE=="EQ" &  
                                       DS_SECURITY_TYPE=="Ordinary Shares", sell]), by=anon_date]

data[,num_purchases_the_day_LSE:=sum(.SD[!is.na(currency) & currency=="GBP" & DS_STOCK_TYPE=="EQ" &  
                                           DS_SECURITY_TYPE=="Ordinary Shares", buy]), by=anon_date]

output_file <- file.path("D:/settings/beset/Desktop/output_new_acc/new_acc_only_sell_days/", "310319sell_day_portfolios_newacc_BIG_1_to_10000.csv")
write.table(data, file=output_file, sep=',', row.names=F, col.names=T)







